/*==============================================================================
"Canada - 	Labor variables (1971) 
		Initial Conditions (1971)- age*,edatt*,inmigration,ind*,serv*

Outline:
This do file brings in provincial level data on
I. 	age structure 
II. 	Education 
III. 	migration
V. 		employment,female employment, unemployment, 1971
VI.		Industry breakdown, 1971

Note: Migration. Census has variable mgrate5, which can be used to calculate 
	inmigration into the province within the last 5 years.
	From another source (see step0121), I know annual net interprovincial 
	migration in a given year. In a later do file, I calculate: 
	outmigration = net interprovincial migration - inmigration_5_years/5

Source of data used in this dofile: 
1971 Census, downloaded from IPUMS-International 

==============================================================================*/

clear all
set more off

cd "$scratch/CA_census_1971"

! uncompress ipumsi_00014.dat.Z
quietly do ipumsi_00014.do 

*I. 	age structure, 

forvalues bottom = 0(5)69 {
	local top = `bottom'+4
	quietly gen age_`bottom'_`top' = 1 if age>=`bottom'  & age<=`top' 
}
quietly gen age_70_plus=1 if age>=70 
gen age_total = 1

*II. *Years of school
gen edatt_yrs_0 = . 
gen edatt_yrs_1 = . 
gen edatt_yrs_2 = . 
gen edatt_yrs_3 = . 
gen edatt_yrs_4 = . 
gen edatt_yrs_5 = .
gen edatt_yrs_6 = .
gen edatt_yrs_7 = .
gen edatt_yrs_8 = .
gen edatt_yrs_9 = .
gen edatt_yrs_10 = .
gen edatt_yrs_11 = .
gen edatt_yrs_12 = .
gen edatt_yrs_13 = .
gen edatt_yrs_14 = .
gen edatt_yrs_15 = .
gen edatt_yrs_16 = .
gen edatt_yrs_17 = .
gen edatt_yrs_18 = .

replace edatt_yrs_0 = 1 if educca ==0 & age>=15 & school!=1
replace edatt_yrs_3 = 1 if educca ==110 & age>=15 & school!=1
replace edatt_yrs_7 = 1 if educca == 120 & age>=15 & school!=1
replace edatt_yrs_10 =1 if educca == 131 & age>=15 & school!=1
replace edatt_yrs_11 =1 if educca ==132 & age>=15 & school!=1
replace edatt_yrs_12 =1 if educca ==133 & age>=15 & school!=1
replace edatt_yrs_13 =1 if educca ==134 & age>=15 & school!=1
replace edatt_yrs_14 =1 if educca ==321 & age>=15 & school!=1
replace edatt_yrs_15 =1 if educca ==322 & age>=15 & school!=1
replace edatt_yrs_17 =1 if educca ==323 & age>=15 & school!=1
replace edatt_yrs_16 =1 if educca == 331 & age>=15 & school!=1
replace edatt_yrs_18 =1 if educca ==332  & age>=15 & school!=1

egen edatt_total = rsum(edatt*)

*III. 	migration

*the following variables refer to movements of people already in Canada five years previously
gen inmigration = 1 if mgrate5==20  //1 if individual moved inter-provincially within last 5 years

tempfile precollapse
save `precollapse.dta'

*	Collapse by Province

*age and schooling variables, and foreign-born population
clear 
use `precollapse.dta'
collapse (sum) age_* edatt* [fw=wtper], by(provca)

sort provca
tempfile temp1
save `temp1.dta'

*inmigration
clear 
use `precollapse.dta'

collapse (sum) inmigration [fw=wtper], by( provca) //collapse by province movers are in now, this in inmigration to province

sort  provca
tempfile temp2
save `temp2.dta'

*V. Employment, unemployment, unemployment rate, 1971

clear 
use `precollapse.dta'
gen POP_CENS1971_CA		= 1
gen EMP_CENS1971_CA		= 1 if empstat==1 & age>=14
gen EMP_female	   		= 1 if empstat==1 & age>=14 & sex==2
gen UNEMP_CENS1971_CA	= 1 if empstat==2  & age>=14
gen LF_CENS1971_CA 		= 1 if (empstat==1|empstat==2)   & age>=14

collapse (sum) EMP* UNEMP LF POP [fw=wtper], by(provca)
	
foreach var of varlist EMP* UNEMP LF POP {
	replace `var'=`var'/1000
}

	
sort  provca
tempfile temp3
save `temp3.dta'

*VI. Employment by industry

clear 
use `precollapse.dta'

gen ind_agro 	= 1 if indgen==10 
gen ind_energy 	= 1 if indgen==40
gen ind_mining 	= 1 if indgen==20
gen ind_construction = 1 if indgen==50
gen ind_metals 	=.
gen ind_mnfg 	= 1 if indgen==30
gen ind_other	=.
egen ind_total 	= rsum(ind_agro-ind_other) 
replace ind_total=ind_total-ind_agro
  
gen serv_commerce  = 1 if indgen==60
gen serv_transport = 1 if indgen==80
gen serv_credit    = 1 if indgen==90
gen serv_admin     = 1 if indgen==100
gen serv_other     = 1 if indgen>=110 & indgen<=114
egen serv_total    = rsum(serv_commerce-serv_other)

collapse (sum) ind_agro-serv_total [fw=wtper], by(provca)

sort provca

*===============================================================================
* Merge above initial conditions
*===============================================================================

sort  provca
merge 1:1 provca using `temp1.dta', nogen
merge 1:1 provca using `temp2.dta', nogen
merge 1:1 provca using `temp3.dta', nogen

*===============================================================================
* Create observation at country level, create Canada NUTS equivalent 
*===============================================================================

tempfile append 
save 	 `append.dta'

collapse (sum) ind_* serv_* age_* edatt_* POP* LF* EMP* UNEMP*

gen region = 1 

append using `append.dta'

replace region= 124001 if provca ==2
replace region= 124002 if provca ==3
replace region= 124003 if provca ==4
replace region= 124004 if provca ==5
replace region= 124005 if provca ==6
replace region= 124006 if provca ==7
replace region= 124007 if provca ==8 
replace region= 124008 if provca ==9
replace region= 124009 if provca ==10
replace region= 124010 if provca ==1

#delim ;
label define region_wvs_labels  
1 "Canada"
124001 "Prince Edward Island"
124002 "Nova Scotia" 
124003 "New Brunswick"
124004 "Quebec" 
124005 "Ontario" 
124006 "Manitoba" 
124007 "Saskatchewan" 
124008 "Alberta"
124009 "British Columbia"
124010 "Newfoundland and Labrador" ;
#delim cr 

label val region region_wvs_labels

gen nuts = ""
replace nuts = "Canada" if region ==1
replace nuts = "CA: Prince Edward Island" if region == 124001
replace nuts =  "CA: Nova Scotia"  if region ==  124002
replace nuts =  "CA: New Brunswick"  if region == 124003
replace nuts =  "CA: Quebec"  if region == 124004
replace nuts =  "CA: Ontario"  if region ==  124005
replace nuts =  "CA: Manitoba"  if region ==  124006
replace nuts =  "CA: Saskatchewan"  if region ==  124007
replace nuts =  "CA: Alberta" if region ==  124008
replace nuts =  "CA: British Columbia"  if region == 124009
replace nuts =  "CA: Newfoundland and Labrador"  if region == 124010

gen year = 1971 
gen EMP_share_female = EMP_female / EMP_CENS * 100

save "$dta_files/IC_CA_CENS1971", replace

keep year POP LF EMP_CENS UNEMP nuts

save "$dta_files/CA_CENS1971_labor", replace

use  "$dta_files/IC_CA_CENS1971", clear

drop POP LF EMP_CENS UNEMP 

save "$dta_files/IC_CA_CENS1971", replace

! compress ipumsi_00014.dat
